[Clang][X86] Replace F16C `vcvtph2ps/256` intrinsics with `(convert|shuffle)vector` builtins #152911

moorabbit · 2025-08-10T11:35:22Z

The following intrinsics were replaced by a combination of __builtin_shufflevector and __builtin_convertvector:

__builtin_ia32_vcvtph2ps
__builtin_ia32_vcvtph2ps256

…onvertvector The following intrinsics were replaced by a combination of `__builtin_shufflevector` and `__builtin_convertvector`: - `__builtin_ia32_vcvtph2ps` - `__builtin_ia32_vcvtph2ps256` Fixes llvm#152749

llvmbot · 2025-08-10T11:35:52Z

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: None (moorabbit)

Changes

The following intrinsics were replaced by a combination of __builtin_shufflevector and __builtin_convertvector:

__builtin_ia32_vcvtph2ps
__builtin_ia32_vcvtph2ps256

Fixes #152749

Full diff: https://github.com/llvm/llvm-project/pull/152911.diff

6 Files Affected:

(modified) clang/include/clang/Basic/BuiltinsX86.td (-8)
(modified) clang/lib/CodeGen/TargetBuiltins/X86.cpp (-2)
(modified) clang/lib/Headers/emmintrin.h (+5)
(modified) clang/lib/Headers/f16cintrin.h (+5-3)
(modified) clang/test/CodeGen/X86/f16c-builtins-constrained.c (+2-2)
(modified) clang/test/CodeGen/X86/f16c-builtins.c (+2-2)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 3efc0be8fa698..fc1ee3be7889f 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -757,14 +757,6 @@ let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
   def vcvtps2ph256 : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int)">;
 }
 
-let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def vcvtph2ps : X86Builtin<"_Vector<4, float>(_Vector<8, short>)">;
-}
-
-let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vcvtph2ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, short>)">;
-}
-
 let Features = "rdrnd", Attributes = [NoThrow] in {
   def rdrand16_step : X86Builtin<"unsigned int(unsigned short *)">;
   def rdrand32_step : X86Builtin<"unsigned int(unsigned int *)">;
diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
index b508709e4bbae..f8d451bd20fa3 100644
--- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
@@ -2841,8 +2841,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
 
   // f16c half2float intrinsics
-  case X86::BI__builtin_ia32_vcvtph2ps:
-  case X86::BI__builtin_ia32_vcvtph2ps256:
   case X86::BI__builtin_ia32_vcvtph2ps_mask:
   case X86::BI__builtin_ia32_vcvtph2ps256_mask:
   case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 60d2000dfb809..9b5567396f60b 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -40,12 +40,17 @@ typedef signed char __v16qs __attribute__((__vector_size__(16)));
 
 #ifdef __SSE2__
 /* Both _Float16 and __bf16 require SSE2 being enabled. */
+typedef _Float16 __v4hf __attribute__((__vector_size__(8)));
 typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));
 typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16)));
 typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1)));
 
 typedef __bf16 __v8bf __attribute__((__vector_size__(16), __aligned__(16)));
 typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
+#else
+/* Use __fp16 when _Float16 is not supported. */
+typedef __fp16 __v4hf __attribute__((__vector_size__(8)));
+typedef __fp16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));
 #endif
 
 /* Define the default attributes for the functions in this file. */
diff --git a/clang/lib/Headers/f16cintrin.h b/clang/lib/Headers/f16cintrin.h
index 94a662c1d93a8..98b25f5a56953 100644
--- a/clang/lib/Headers/f16cintrin.h
+++ b/clang/lib/Headers/f16cintrin.h
@@ -39,7 +39,8 @@ static __inline float __DEFAULT_FN_ATTRS128
 _cvtsh_ss(unsigned short __a)
 {
   __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
-  __v4sf __r = __builtin_ia32_vcvtph2ps(__v);
+  __v4hi __w = __builtin_shufflevector(__v, __v, 0, 1, 2, 3);
+  __v4sf __r = __builtin_convertvector((__v4hf)__w, __v4sf);
   return __r[0];
 }
 
@@ -109,7 +110,8 @@ _cvtsh_ss(unsigned short __a)
 static __inline __m128 __DEFAULT_FN_ATTRS128
 _mm_cvtph_ps(__m128i __a)
 {
-  return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
+  __v4hi __v = __builtin_shufflevector((__v8hi)__a, (__v8hi)__a, 0, 1, 2, 3);
+  return __builtin_convertvector((__v4hf)__v, __v4sf);
 }
 
 /// Converts a 256-bit vector of [8 x float] into a 128-bit vector
@@ -153,7 +155,7 @@ _mm_cvtph_ps(__m128i __a)
 static __inline __m256 __DEFAULT_FN_ATTRS256
 _mm256_cvtph_ps(__m128i __a)
 {
-  return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
+  return __builtin_convertvector((__v8hf)__a, __v8sf);
 }
 
 #undef __DEFAULT_FN_ATTRS128
diff --git a/clang/test/CodeGen/X86/f16c-builtins-constrained.c b/clang/test/CodeGen/X86/f16c-builtins-constrained.c
index bbd4d8f83b53a..5716f09a677f5 100644
--- a/clang/test/CodeGen/X86/f16c-builtins-constrained.c
+++ b/clang/test/CodeGen/X86/f16c-builtins-constrained.c
@@ -16,7 +16,7 @@ float test_cvtsh_ss(unsigned short a) {
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict")
   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
   return _cvtsh_ss(a);
@@ -38,7 +38,7 @@ unsigned short test_cvtss_sh(float a) {
 
 __m128 test_mm_cvtph_ps(__m128i a) {
   // CHECK-LABEL: test_mm_cvtph_ps
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: call {{.*}}<4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict")
   return _mm_cvtph_ps(a);
 }
diff --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c
index 3c6d64c225b32..61ffa24e6f5c7 100644
--- a/clang/test/CodeGen/X86/f16c-builtins.c
+++ b/clang/test/CodeGen/X86/f16c-builtins.c
@@ -16,7 +16,7 @@ float test_cvtsh_ss(unsigned short a) {
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: fpext <4 x half> %{{.*}} to <4 x float>
   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
   return _cvtsh_ss(a);
@@ -35,7 +35,7 @@ unsigned short test_cvtss_sh(float a) {
 
 __m128 test_mm_cvtph_ps(__m128i a) {
   // CHECK-LABEL: test_mm_cvtph_ps
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: fpext <4 x half> %{{.*}} to <4 x float>
   return _mm_cvtph_ps(a);
 }

llvmbot · 2025-08-10T11:35:53Z

@llvm/pr-subscribers-clang-codegen

Author: None (moorabbit)

Changes

The following intrinsics were replaced by a combination of __builtin_shufflevector and __builtin_convertvector:

__builtin_ia32_vcvtph2ps
__builtin_ia32_vcvtph2ps256

Fixes #152749

Full diff: https://github.com/llvm/llvm-project/pull/152911.diff

6 Files Affected:

(modified) clang/include/clang/Basic/BuiltinsX86.td (-8)
(modified) clang/lib/CodeGen/TargetBuiltins/X86.cpp (-2)
(modified) clang/lib/Headers/emmintrin.h (+5)
(modified) clang/lib/Headers/f16cintrin.h (+5-3)
(modified) clang/test/CodeGen/X86/f16c-builtins-constrained.c (+2-2)
(modified) clang/test/CodeGen/X86/f16c-builtins.c (+2-2)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 3efc0be8fa698..fc1ee3be7889f 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -757,14 +757,6 @@ let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
   def vcvtps2ph256 : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int)">;
 }
 
-let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def vcvtph2ps : X86Builtin<"_Vector<4, float>(_Vector<8, short>)">;
-}
-
-let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vcvtph2ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, short>)">;
-}
-
 let Features = "rdrnd", Attributes = [NoThrow] in {
   def rdrand16_step : X86Builtin<"unsigned int(unsigned short *)">;
   def rdrand32_step : X86Builtin<"unsigned int(unsigned int *)">;
diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
index b508709e4bbae..f8d451bd20fa3 100644
--- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
@@ -2841,8 +2841,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
 
   // f16c half2float intrinsics
-  case X86::BI__builtin_ia32_vcvtph2ps:
-  case X86::BI__builtin_ia32_vcvtph2ps256:
   case X86::BI__builtin_ia32_vcvtph2ps_mask:
   case X86::BI__builtin_ia32_vcvtph2ps256_mask:
   case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 60d2000dfb809..9b5567396f60b 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -40,12 +40,17 @@ typedef signed char __v16qs __attribute__((__vector_size__(16)));
 
 #ifdef __SSE2__
 /* Both _Float16 and __bf16 require SSE2 being enabled. */
+typedef _Float16 __v4hf __attribute__((__vector_size__(8)));
 typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));
 typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16)));
 typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1)));
 
 typedef __bf16 __v8bf __attribute__((__vector_size__(16), __aligned__(16)));
 typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
+#else
+/* Use __fp16 when _Float16 is not supported. */
+typedef __fp16 __v4hf __attribute__((__vector_size__(8)));
+typedef __fp16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));
 #endif
 
 /* Define the default attributes for the functions in this file. */
diff --git a/clang/lib/Headers/f16cintrin.h b/clang/lib/Headers/f16cintrin.h
index 94a662c1d93a8..98b25f5a56953 100644
--- a/clang/lib/Headers/f16cintrin.h
+++ b/clang/lib/Headers/f16cintrin.h
@@ -39,7 +39,8 @@ static __inline float __DEFAULT_FN_ATTRS128
 _cvtsh_ss(unsigned short __a)
 {
   __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
-  __v4sf __r = __builtin_ia32_vcvtph2ps(__v);
+  __v4hi __w = __builtin_shufflevector(__v, __v, 0, 1, 2, 3);
+  __v4sf __r = __builtin_convertvector((__v4hf)__w, __v4sf);
   return __r[0];
 }
 
@@ -109,7 +110,8 @@ _cvtsh_ss(unsigned short __a)
 static __inline __m128 __DEFAULT_FN_ATTRS128
 _mm_cvtph_ps(__m128i __a)
 {
-  return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
+  __v4hi __v = __builtin_shufflevector((__v8hi)__a, (__v8hi)__a, 0, 1, 2, 3);
+  return __builtin_convertvector((__v4hf)__v, __v4sf);
 }
 
 /// Converts a 256-bit vector of [8 x float] into a 128-bit vector
@@ -153,7 +155,7 @@ _mm_cvtph_ps(__m128i __a)
 static __inline __m256 __DEFAULT_FN_ATTRS256
 _mm256_cvtph_ps(__m128i __a)
 {
-  return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
+  return __builtin_convertvector((__v8hf)__a, __v8sf);
 }
 
 #undef __DEFAULT_FN_ATTRS128
diff --git a/clang/test/CodeGen/X86/f16c-builtins-constrained.c b/clang/test/CodeGen/X86/f16c-builtins-constrained.c
index bbd4d8f83b53a..5716f09a677f5 100644
--- a/clang/test/CodeGen/X86/f16c-builtins-constrained.c
+++ b/clang/test/CodeGen/X86/f16c-builtins-constrained.c
@@ -16,7 +16,7 @@ float test_cvtsh_ss(unsigned short a) {
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict")
   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
   return _cvtsh_ss(a);
@@ -38,7 +38,7 @@ unsigned short test_cvtss_sh(float a) {
 
 __m128 test_mm_cvtph_ps(__m128i a) {
   // CHECK-LABEL: test_mm_cvtph_ps
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: call {{.*}}<4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict")
   return _mm_cvtph_ps(a);
 }
diff --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c
index 3c6d64c225b32..61ffa24e6f5c7 100644
--- a/clang/test/CodeGen/X86/f16c-builtins.c
+++ b/clang/test/CodeGen/X86/f16c-builtins.c
@@ -16,7 +16,7 @@ float test_cvtsh_ss(unsigned short a) {
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: fpext <4 x half> %{{.*}} to <4 x float>
   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
   return _cvtsh_ss(a);
@@ -35,7 +35,7 @@ unsigned short test_cvtss_sh(float a) {
 
 __m128 test_mm_cvtph_ps(__m128i a) {
   // CHECK-LABEL: test_mm_cvtph_ps
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: fpext <4 x half> %{{.*}} to <4 x float>
   return _mm_cvtph_ps(a);
 }

moorabbit · 2025-08-10T11:40:05Z

I couldn't get __builtin_convertvector to emit an fpext instruction instead of an sitofp when converting a 16-bit half-precision float into a 32-bit float.

I tried to solve this by defining __v4hf and __v8hf types (vectors of 4 and 8 half-precision floats) and casting the operand of __builtin_convertvector to one of them, so that Clang recognizes the vector elements as floating types rather than integers.

RKSimon · 2025-08-10T11:47:42Z

clang/lib/Headers/f16cintrin.h

-  __v4sf __r = __builtin_ia32_vcvtph2ps(__v);
+  __v4hi __w = __builtin_shufflevector(__v, __v, 0, 1, 2, 3);
+  __v4sf __r = __builtin_convertvector((__v4hf)__w, __v4sf);
  return __r[0];


Does this work consistently? I haven't properly compared the final asm at different -O levels.

float _cvtsh_ss(unsigned short __a) { return (float)__builtin_bit_cast(_Float16, __a); }

Thx, LGTM: https://godbolt.org/z/Pcr7aYeKE.
I wasn't aware of the builtin.

RKSimon · 2025-08-10T11:48:56Z

clang/lib/Headers/emmintrin.h

+#else
+/* Use __fp16 when _Float16 is not supported. */
+typedef __fp16 __v4hf __attribute__((__vector_size__(8)));
+typedef __fp16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));


I don't understand why this is necessary?

Some tests were failing due to _Float16 not being supported on i686-*.
Maybe it's better to just add the -target-feature +sse2 flag in the failing tests to force support of _Float16?

We intend request SSE2 for _Float16 type.

Would we be better off just defining the types inside the intrinsics?

That sounds better. Thx!

moorabbit · 2025-08-12T12:27:24Z

I'm also using __fp16 because it's supported on all targets instead of _Float16 which requires SSE2.

RKSimon

one final minor

RKSimon · 2025-08-12T13:39:16Z

clang/lib/Headers/f16cintrin.h

+  typedef __fp16 __v4fp16 __attribute__((__vector_size__(8)));
+
+  __v4hi __v = __builtin_shufflevector((__v8hi)__a, (__v8hi)__a, 0, 1, 2, 3);
+  return __builtin_convertvector((__v4fp16)__v, __v4sf);


return (__m128)__builtin_convertvector((__v4fp16)__v, __v4sf);

RKSimon · 2025-08-12T13:39:37Z

clang/lib/Headers/f16cintrin.h

-  return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
+  typedef __fp16 __v8fp16 __attribute__((__vector_size__(16), __aligned__(16)));
+
+  return __builtin_convertvector((__v8fp16)__a, __v8sf);


return (__m256)__builtin_convertvector((__v8fp16)__a, __v8sf);

github-actions · 2025-08-12T13:55:41Z

✅ With the latest revision this PR passed the C/C++ code formatter.

RKSimon

LGTM - cheers

moorabbit · 2025-08-12T14:45:24Z

Thx for the review!

FYI, I don't have commit access yet, so I can't merge my PR. Someone needs to do it on my behalf.

carlosgalvezp · 2025-08-20T19:23:31Z

Hi!

It appears this commit breaks our code, with the error:

error: use of undeclared identifier '__builtin_ia32_vcvtph2ps256';

I could not find any mention of this change in the Release Notes, nor documentation on how to migrate from it. Could that be added?

topperc · 2025-08-20T21:14:37Z

Hi!

It appears this commit breaks our code, with the error:
error: use of undeclared identifier '__builtin_ia32_vcvtph2ps256';
I could not find any mention of this change in the Release Notes, nor documentation on how to migrate from it. Could that be added?

Are you using the builtin directly instead using one of the intrin.h headers?

phoebewang · 2025-08-21T03:26:10Z

We don't guarantee builtins compatibility. You should use intrinsic _mm256_mask_cvtph_ps instead. Please taking care of the difference in arguments order.

carlosgalvezp · 2025-08-21T05:29:52Z

Thank you for the quick response! Will try that out.

[Clang][X86] Replace F16C vcvtph2ps/256 intrinsics with __builtin_c…

ea2135f

…onvertvector The following intrinsics were replaced by a combination of `__builtin_shufflevector` and `__builtin_convertvector`: - `__builtin_ia32_vcvtph2ps` - `__builtin_ia32_vcvtph2ps256` Fixes llvm#152749

llvmbot added clang Clang issues not falling into any other category backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang:codegen IR generation bugs: mangling, exceptions, etc. labels Aug 10, 2025

RKSimon reviewed Aug 10, 2025

View reviewed changes

RKSimon requested a review from phoebewang August 10, 2025 11:47

RKSimon reviewed Aug 10, 2025

View reviewed changes

moorabbit requested a review from RKSimon August 10, 2025 12:46

RKSimon's comment

98baf57

RKSimon reviewed Aug 12, 2025

View reviewed changes

RKSimon's comment llvm#2

b50d611

clang-format

77ce810

RKSimon approved these changes Aug 12, 2025

View reviewed changes

Merge branch 'main' into replace-ia32-vcvtph2ps-256-intrinsics

fb8532a

RKSimon merged commit f8653ce into llvm:main Aug 12, 2025
9 checks passed

moorabbit deleted the replace-ia32-vcvtph2ps-256-intrinsics branch August 12, 2025 15:33

[Clang][X86] Replace F16C vcvtph2ps/256 intrinsics with (convert|shuffle)vector builtins #152911

[Clang][X86] Replace F16C vcvtph2ps/256 intrinsics with (convert|shuffle)vector builtins #152911

Uh oh!

Conversation

moorabbit commented Aug 10, 2025

Uh oh!

llvmbot commented Aug 10, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Aug 10, 2025

Uh oh!

moorabbit commented Aug 10, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

moorabbit Aug 10, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

moorabbit commented Aug 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

github-actions bot commented Aug 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

moorabbit commented Aug 12, 2025

Uh oh!

Uh oh!

carlosgalvezp commented Aug 20, 2025

Uh oh!

topperc commented Aug 20, 2025

Uh oh!

phoebewang commented Aug 21, 2025

Uh oh!

carlosgalvezp commented Aug 21, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

6 participants

[Clang][X86] Replace F16C `vcvtph2ps/256` intrinsics with `(convert|shuffle)vector` builtins #152911

[Clang][X86] Replace F16C `vcvtph2ps/256` intrinsics with `(convert|shuffle)vector` builtins #152911

llvmbot commented Aug 10, 2025 •

edited

Loading

moorabbit Aug 10, 2025 •

edited

Loading

moorabbit commented Aug 12, 2025 •

edited

Loading

github-actions bot commented Aug 12, 2025 •

edited

Loading